/* SRE-FILTER procedure (5/96) to access a "SWISH" index file. Requires that a SWISH index have been previously built using SWISH.EXE (available from: ftp://ftp.eit.com/pub/web.software/swish/ (uses some code from DOSEARC.CMD by 3/20/1996, Kevin Vigor) In addition, will attempt to make a short description (if desired), using either a name= (or http-equiv=) "description" header element, or the first several ... elements. Options: keyword: List of words to search for, with OR AND NOT as logical controls (and is assumed). Note, there is NO phrase support! index: Index file to search for (typically provided as type=hidden) option1..option9: list of options. Valid ones include -t HBethc -m #lines header: H1 header to use (default used if none provided ) comment1..commentx: Comments to place (using ) under header conditon: (=YES,AND,NOT) Or,and,not substituted between keywords. Default is "and" searches. (no substitution occurs if NOT, OR or AND is between words in keyword list) This will not work well with (phrases) or in combination with complex user specified search strings. summary = NO YES CREATE no = no summary, but check for existence yes = look for name="description" (or http-equiv=.) and use the "contents= field) create = If no name=found, then use the first several ' call lineout tempfile, "Index search results /title>" call lineout tempfile, "</head><body>" /* Begin the result body.*/ tempdir=strip(translate(tempdir,'\','/'),'t','\') TEMPOUT = dostempName(TEMPDIR'\ST$?????.OUT') if tempout = "0" | tempfile="" then do call lineout tempfile," <STRONG> ERROR: Could not access working directory </STRONG>" call lineout tempfile," </BODY> </HTML> " return 'FILE ERASE TYPE text/html NAME' tempfile end keywords='help' index_file="INDEX.SWI" swopts=' ' aheader="Search the site-index " ncmt=0 ; door=0 summary=1 do until list="" parse var list v1 '&' list parse var v1 avar '=' aval ; avar=translate(avar) if abbrev(avar,"KEYWORD")=1 then do keywords=packur(translate(aval,' ','+')) end if abbrev(avar,'HEADER')=1 then do aheader=packur(translate(aval,' ','+')) end if abbrev(avar,'INDEX')=1 then do indxfile=aval end if abbrev(avar,'COMMENT')=1 then do ncmt=ncmt+1 comments.ncmt=translate(aval, ' ','+'||'00090d0a'x) end if abbrev(avar,'COND')=1 then do select when abbrev(translate(aval),'Y')=1 then door=' OR ' when abbrev(translate(aval),'O')=1 then door=' OR ' when abbrev(translate(aval),'N')=1 then door=' NOT ' otherwise door=0 end end if abbrev(avar,'SUMMARY')=1 then do tt=translate(aval) summary=0 if tt="NO" then summary=1 if tt="YES" then summary=2 if tt="CREATE" then summary=3 end if abbrev(avar,'OPTION')=1 then do swopts=swopts||" "||translate(aval,' ','+') end end call lineout tempfile,' <h1> ' aheader ' </h1> ' do mm=1 to ncmt call lineout tempfile,' <em> ' comments.mm ' </em> <br>' end twords=translate(keywords) ; srchwords="" /* remove silly srchwords */ do mm=1 to words(twords) aword=word(twords,mm) if wordpos(aword,'AND NOT OR')>0 then iterate srchwords=srchwords||" "||aword end if door<>0 & words(keywords)>1 then do /* insert not / or into keyword list */ tmp=word(keywords,1) ; wasand=0 do mmm=2 to words(keywords) aww=word(keywords,mmm) ; taw=strip(translate(aww)) if wasand=1 then do tmp=tmp||' '||aww wasand=0 iterate end if taw="OR"| taw="AND" | taw="NOT"then do tmp=tmp||' '||aww wasand=1 iterate end tmp=tmp||door||aww end keywords=tmp end t1='swish -f '||INDXfile ||' -w '||keywords||' '||swopts||' > '||tempout foodir=directory(servdir) address cmd t1 address foodir=directory(foodir) /* get results */ gotem=fileread(tempout,'filelines',,'e') goo=sysfiledelete(tempout) foo=filelines.gotem /* --- sample to test remote url retrieval filelines.gotem='100 http://www.yahoo.com "THis is yahoo " 1 ' gotem=gotem+1 filelines.gotem=foo ---------- */ if gotem=0 then do call lineout tempfile," <STRONG> ERROR: Problem searching index </STRONG>" call lineout tempfile," </BODY> </HTML> " return 'FILE ERASE TYPE text/html NAME' tempfile end igot=0 do mm=1 to gotem RLINE = filelines.mm IF RLINE = '' THEN leave IF RLINE = '.' THEN leave /* Skip comment lines and empty lines. */ IF abbrev(RLINE,'#') = 1 THEN iterate /* Split the line into fields. */ PARSE VAR RLINE R_SCORE R_FILE R_stuff foo1=words(r_stuff) r_title=delword(r_stuff,foo1) r_position=word(r_stuff,foo1) /* Make sure this looks valid (i.e. that position and score are both numeric variables). */ IF VERIFY(R_SCORE, '1234567890') <> 0 | VERIFY(R_POSITION, '1234567890') <> 0 THEN do aa=strip(rline) if abbrev(translate(aa),'SEARCH WORDS:') then do call lineout tempfile,'<p><strong> Searching for: </strong> ' parse var rline foo ':' lookme call lineout tempfile,' <code> ' lookme '</code>' end iterate end igot=igot+1 if igot=1 then do call lineout tempfile,'<h3> Results of search </h3> <dl> ' end /* Spit out this reference as a HTML link. */ r_file=translate(r_file,'/','\') r_title=strip(strip(r_title),,'"') call lineout tempfile,'<dt> <A href=' r_file '>' r_title '</a>' call lineout tempfile,' <em> Score= ' r_score ' </em> ' call lineout tempfile,' <code> , ' r_position 'bytes </code> <br>' if summary>0 then do foo=make_summary(r_file,summary,srchwords) end end if igot>0 then call lineout tempfile,'</dl>' if gotit>0 then call lineout tempfile,"<p> <em> Total of " igot " matches </em>" call lineout tempfile,'</body></html>' return 'FILE ERASE TYPE text/html NAME' tempfile /* ------------------------------------- */ /* write summary info */ make_summary:procedure expose tempfile ddir macrospace_input virtual_file parse arg aurl,asummary, srchwords atype=translate(sref_mediatype(aurl)) /* get mime type */ /* strip out http://a.b.c/ */ ishttp=abbrev(strip(translate(aurl)),"HTTP://") if ishttp=1 then do filename=aurl atype="TEXT/HTML" end if ishttp=0 then do afilenam=sref_do_virtual(ddir,aurl,macrospace_input,virtual_file) if afilenam=0 then do call lineout tempfile,'<dd> <code> Rmote URL not available </code> ' return 0 end eek=sysfiletree(afilenam,'aflist','F') /* check for existence*/ if eek<>0 | aflist.0=0 then do /* error */ call lineout tempfile,'<dd> <code> File not available </code> ' return 0 end if asummary=1 then do /* file exist,return a blank line */ call lineout tempfile,'<dd> ' return 0 end end /* If text/plain, return first 200 characters */ if atype="TEXT/PLAIN" then do if asummary=2 then do call lineout tempfile,'<dd> <code> Summary not available </code> ' return 0 end /* else, asummary must be 3 */ if ishttp=1 then do stuff=Sref_get_url(aurl,10000) if stuff=0 then do call lineout tempfile,'<dd> <code> Summary not available </code> ' return 0 end end else do filename=aflist.1 filename=strip(word(aflist.1,words(aflist.1))) alen=min(chars(filename),300) stuff=charin(filename,1,alen) end wow=space(translate(stuff,' ','00090a0d1a1b'x)) wow=sref_replacestrg(wow,'<','<','ALL') wow=sref_replacestrg(wow,'>','>','ALL') do jmm=1 to words(srchwords) aword=strip(word(srchwords,jmm)) wow=sref_make_block(aword,wow,'<b>','</b>') /* highlight matches */ end call lineout tempfile,'<dd> (beginning) ' wow return 0 end /* if not html, return no summary*/ if atype<>"TEXT/HTML" then do call lineout tempfile,'<dd> <code> Summary not available </code> ' return 0 end /* if here-- html, summary > 1 */ /* and the url points to a legit file; read it in (up to 10000 characters */ if ishttp=1 then do stuff=sref_get_url(aurl,10000) if stuff=0 then do call lineout tempfile,'<dd> <code> Summary not available </code> ' return 0 end end else do filename=aflist.1 filename=strip(word(aflist.1,words(aflist.1))) alen=min(chars(filename),10000) stuff=charin(filename,1,alen) end stuff=space(translate(stuff,' ','00090a0d1a1b'x)) url_title=0 wow=look_header(filename) if wow<>0 then do do jmm=1 to words(srchwords) aword=strip(word(srchwords,jmm)) wow=sref_make_block(aword,wow,'<b>','</b>') /* highlight matches */ end call lineout tempfile,'<dd> ' wow return 0 end if asummary<>3 then do /* return if not CREATE */ call lineout tempfile,'<dd> <code> Explicit summary not available</code> ' return 0 end WOW=LOOK_HTAG() if wow<>0 then do do jmm=1 to words(srchwords) aword=strip(word(srchwords,jmm)) wow=sref_make_block(aword,wow,'<b>','</b>') /* highlight matches */ end call lineout tempfile,'<dd> <code>' call lineout tempfile, " " wow call lineout tempfile,'</code>' return 0 end if url_title<>0 then call lineout tempfile,'<dd> <code> ' url_title ' </code> ' else call lineout tempfile,'<dd> <code> Summary not available </code> ' return 0 /* ----------------------------------------------------------------------- */ /* Look for "desc" field in header */ /* ----------------------------------------------------------------------- */ look_header: procedure expose stuff url_title parse arg afile dowrite=0 do until stuff="" parse var stuff p1 '<' tag '>' stuff if translate(word(tag,1))="HEAD" then do /* now in head !*/ dowrite=1 iterate end if dowrite=0 then iterate /* wait till we get into head .. */ if translate(word(tag,1))="/HEAD" then /* out of head, all done ! */ leave /* IT IS A TITLE TAG? */ if translate(word(tag,1))="TITLE" then do parse var stuff url_title '<' footag '>' stuff end /* is it a META HTTP-EQUIV or a META NAME ? */ if translate(word(tag,1))="META" then do parse var tag ameta atype '=' rest tatype=translate(atype) if tatype="HTTP-EQUIV" | tatype="NAME" then do parse var rest aval1 rest REST=STRIP(REST) aval1=strip(aval1) ; aval1=strip(aval1,,'"') if abbrev(translate(aval1),'DESC')<>1 then iterate aval2=" " foo1=ABBREV(translate(rest),'CONTENT') if foo1>0 then do PARSE VAR REST FOO '=' AVAL2 aval2=strip(aval2) aval2=strip(aval2,'b','"') WOW=LEFT(AVAL2,500) wow=sref_replacestrg(wow,'<','<','ALL') wow=sref_replacestrg(wow,'>','>','ALL') return WOW end end /* name or http-equiv */ end /* meta */ end /* stuff */ if stuff="" then say "Warning: </head> tag NOT found: " afile return 0 /* ----------------------------------------------------------------------- */ /* Extract <hn> fields */ /* ----------------------------------------------------------------------- */ look_htag: procedure expose stuff filename stuff0=left(stuff,1000) amessage="" dowrite=0 do until stuff="" parse var stuff p1 '<' tag '>' stuff ttag=translate(word(tag,1)) if wordpos(ttag,' H1 H2 H3 H4 TITLE')>0 THEN DO /* grab stuff */ parse var stuff amess '<' tag2 '>' stuff amessage=amessage||amess||'<b> | </b>' end end if amessage="" then do /* getting desperate -- grab any old words! */ do until stuff0="" parse var stuff0 p1 '<' tag '>' stuff0 amessage=amessage||' '||p1 end end if amessage="" then return 0 amessage=left(amessage,300) /* keep it short */ return amessage